package com.chajia.pageprocess;

import com.chajia.dao.ProductDao;
import com.chajia.util.LogUtil;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.math.BigInteger;

/**
 * Created with IntelliJ IDEA.
 * User: chenchang
 * Date: 13-12-30
 * Time: 上午1:33
 * To change this template use File | Settings | File Templates.
 */
public class EtaoProcess implements PageProcessor {
    public static Class clazz= EtaoProcess.class;
    private Site site = Site.me().setDomain("s.etao.com").setSleepTime(500);
    BigInteger PIDINTMAX=new BigInteger("35031015935");
    private  BigInteger pidIntLast=new BigInteger("35031005935");
    @Override
    public void process(Page page) {
        if (pidIntLast.compareTo(PIDINTMAX)<=1){
            for(int i=0 ;i<50;i++){
                pidIntLast=pidIntLast.add(new BigInteger("1"));
                try {
//                    if(!ProductDao.isExist(pidIntLast.toString()) || ProductDao.isUseful(pidIntLast.toString())){
                        Request rq= new Request("http://s.etao.com/detail/"+pidIntLast.toString()+".html");
                        page.addTargetRequest(rq);
//                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }

        }
        Selectable url =page.getUrl();
        String pid = url.regex("\\d+").toString() ;
        try {
            if (ProductDao.isExist(pid) && !ProductDao.isUseful(pid)){
                page.setSkip(true);
            }
        } catch (Exception e) {
            LogUtil.error(clazz, e.getMessage());
        }
        if(page.getHtml().xpath("//div[@id='no-good']").toString()==null){
            page.putField("pid",pid);
            page.putField("url",url.toString());
            page.putField("title",page.getHtml().xpath("//h1[@class='product-title']/text()").toString());
            page.putField("price",page.getHtml().xpath("//li/span[@class='original-price']/text()").toString().substring(1));
            page.putField("img",page.getHtml().xpath("//div[@class='product-picture']/@img-src").toString());
        }
    }

    @Override
    public Site getSite() {
        return site;
    }
}
